library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
  method         from
  print.tbl_lazy     
  print.tbl_sql      
── Attaching packages ───────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
✓ ggplot2 3.3.2     ✓ purrr   0.3.4
✓ tibble  3.0.3     ✓ dplyr   1.0.2
✓ tidyr   1.1.1     ✓ stringr 1.4.0
✓ readr   1.3.1     ✓ forcats 0.5.0
── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
x dplyr::filter() masks stats::filter()
x dplyr::lag()    masks stats::lag()
library(janitor)

Attaching package: ‘janitor’

The following objects are masked from ‘package:stats’:

    chisq.test, fisher.test
library(lubridate)

Attaching package: ‘lubridate’

The following objects are masked from ‘package:base’:

    date, intersect, setdiff, union
library(plotly)
Registered S3 method overwritten by 'data.table':
  method           from
  print.data.table     
Registered S3 method overwritten by 'htmlwidgets':
  method           from         
  print.htmlwidget tools:rstudio

Attaching package: ‘plotly’

The following object is masked from ‘package:ggplot2’:

    last_plot

The following object is masked from ‘package:stats’:

    filter

The following object is masked from ‘package:graphics’:

    layout
astronauts = read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-14/astronauts.csv')
Parsed with column specification:
cols(
  .default = col_double(),
  name = col_character(),
  original_name = col_character(),
  sex = col_character(),
  nationality = col_character(),
  military_civilian = col_character(),
  selection = col_character(),
  occupation = col_character(),
  mission_title = col_character(),
  ascend_shuttle = col_character(),
  in_orbit = col_character(),
  descend_shuttle = col_character()
)
See spec(...) for full column specifications.
astronauts = astronauts %>% 
  mutate(
    sex = as.factor(sex),
    year_of_birth = year(parse_date(as.character(astronauts$year_of_birth), "%Y")),
    nationality = as.factor(nationality),
    selection = as.factor(selection),
    year_of_selection = year(parse_date(as.character(astronauts$year_of_selection), "%Y")),
    mission_number = as.factor(mission_number),
    occupation = as.factor(occupation),
    year_of_mission = year(parse_date(as.character(astronauts$year_of_mission), "%Y")),
    mission_title = as.factor(mission_title),
    ascend_shuttle = as.factor(ascend_shuttle),
    in_orbit = as.factor(in_orbit),
    descend_shuttle = as.factor(descend_shuttle)
  )
astronauts
launches = read_csv("https://raw.githubusercontent.com/TheEconomist/graphic-detail-data/master/data/2018-10-20_space-launches/launches.csv")
Parsed with column specification:
cols(
  tag = col_character(),
  JD = col_double(),
  launch_date = col_date(format = ""),
  launch_year = col_double(),
  type = col_character(),
  variant = col_character(),
  mission = col_character(),
  agency = col_character(),
  state_code = col_character(),
  category = col_character(),
  agency_type = col_character()
)
launches = within.data.frame(launches, rm(tag, JD))
launches = launches %>%
  mutate(
    type = as.factor(type), 
    variant = as.factor(variant),
    state_code = as.factor(state_code), 
    category = as.factor(category),
    agency_type = as.factor(agency_type)
    )
launches = launches %>% filter(launch_date<=Sys.Date())

require(countrycode)
Loading required package: countrycode
launches = launches %>%
  mutate(
    state_code = fct_collapse(
      state_code,
      "RU" = c("SU", "RU"),
      "FR" = "F",
      "JP" = "J",
      "IT" = "I",
      "FR" = c("I-ESA", "I-ELDO"),
      "KY" = "CYM",
      "GB" = "UK")
    ) %>%
  mutate(state_code = countrycode(state_code, "iso2c", "country.name"),
         state_code = as.factor(state_code))
agencies = read_csv("https://raw.githubusercontent.com/TheEconomist/graphic-detail-data/master/data/2018-10-20_space-launches/agencies.csv")
Parsed with column specification:
cols(
  agency = col_character(),
  count = col_double(),
  ucode = col_character(),
  state_code = col_character(),
  type = col_character(),
  class = col_character(),
  tstart = col_character(),
  tstop = col_character(),
  short_name = col_character(),
  name = col_character(),
  location = col_character(),
  longitude = col_character(),
  latitude = col_character(),
  error = col_character(),
  parent = col_character(),
  short_english_name = col_character(),
  english_name = col_character(),
  unicode_name = col_character(),
  agency_type = col_character()
)
agencies = agencies %>%
  mutate(
    tstart = parse_date(as.character(tstart), "%Y %b %d"),
    tstop = parse_date(as.character(tstop), "%Y %b %d"),
    agency_type = as.factor(agency_type)
  )
Problem with `mutate()` input `tstart`.
ℹ 55 parsing failures.
row col           expected   actual
  1  -- date like %Y %b %d 1960    
  7  -- date like %Y %b %d 1997 Jul
  9  -- date like %Y %b %d 2004    
 10  -- date like %Y %b %d 1993    
 11  -- date like %Y %b %d 1995    
... ... .................. ........
See problems(...) for more details.

ℹ Input `tstart` is `parse_date(as.character(tstart), "%Y %b %d")`.55 parsing failures.
row col           expected   actual
  1  -- date like %Y %b %d 1960    
  7  -- date like %Y %b %d 1997 Jul
  9  -- date like %Y %b %d 2004    
 10  -- date like %Y %b %d 1993    
 11  -- date like %Y %b %d 1995    
... ... .................. ........
See problems(...) for more details.
Problem with `mutate()` input `tstop`.
ℹ 63 parsing failures.
row col           expected   actual
  1  -- date like %Y %b %d 1991 Dec
  2  -- date like %Y %b %d 1991    
  3  -- date like %Y %b %d -       
  4  -- date like %Y %b %d -       
  5  -- date like %Y %b %d *       
... ... .................. ........
See problems(...) for more details.

ℹ Input `tstop` is `parse_date(as.character(tstop), "%Y %b %d")`.63 parsing failures.
row col           expected   actual
  1  -- date like %Y %b %d 1991 Dec
  2  -- date like %Y %b %d 1991    
  3  -- date like %Y %b %d -       
  4  -- date like %Y %b %d -       
  5  -- date like %Y %b %d *       
... ... .................. ........
See problems(...) for more details.
agencies
launches %>%
  count(launch_year, agency_type)


launches %>%
  count(launch_year, agency_type) %>%
  ggplot(aes(launch_year, n, color= agency_type)) +geom_line() +
  labs(x = "Year", y = "Launch Counts", color="Agency Type")


launches %>%
  count(launch_year, agency_type) %>%
  plot_ly(x = ~launch_year, y = ~n, color=~agency_type, type = 'scatter', mode = 'lines')
`arrange_()` is deprecated as of dplyr 0.7.0.
Please use `arrange()` instead.
See vignette('programming') for more help
This warning is displayed once every 8 hours.
Call `lifecycle::last_warnings()` to see where this warning was generated.
launches %>% 
  mutate(type = fct_reorder(type, launch_date, min)) %>%
  ggplot(aes(x=launch_date, y=state_code, color=agency_type)) + 
  geom_jitter(alpha=0.2, height = 0.2) + 
  theme_minimal() +
  facet_grid(agency_type~., scales = 'free') + 
  labs(x= "# of Agencies", 
       y="", 
       color='Agency Type',
       title = '# of Agencies in different countries Countries')


launches %>% 
  count(agency_type, state_code, sort=T) %>%
  plot_ly(y = ~state_code, x = ~n, color = ~agency_type, type="bar") %>%
  layout(legend = list(title=list(text='Agency Type')), 
         xaxis = list(title = "# of Agencies", type = "log"), 
         yaxis = list(title = ""), 
         title = '# of Agencies in different countries Countries')
launches %>%
  mutate(state_code = fct_lump(state_code, 6)) %>%
  count(launch_year, state_code, sort=T) %>%
  mutate(state_code = fct_reorder(state_code, -n, sum))%>%
  ggplot(aes(launch_year, n, color=state_code)) + geom_line() + 
  labs(x= "Launch Year", 
       y = "Launch Counts", 
       color= "Countries", 
       title= "Yearly Launch Counts wrt Countries")


launches %>%
  mutate(state_code = fct_lump(state_code, 6)) %>%
  count(launch_year, state_code, sort=T) %>%
  mutate(state_code = fct_reorder(state_code, -n, sum)) %>%
  plot_ly(x = ~launch_year, y = ~n, color=~state_code) %>% 
  add_lines() %>% 
  layout(legend = list(title=list(text='Countries')), 
         xaxis = list(title = "Launch Year"), 
         yaxis = list(title = "Launch Counts"), 
         title = 'Yearly Launch Counts wrt Countries')
launches %>%
  filter(agency_type%in%c("private","startup")) %>%
  inner_join(agencies %>% select(agency, name, short_name, parent), by = 'agency')  %>%
  ggplot(aes(y=name, fill = state_code)) + geom_bar() + 
  facet_grid(state_code~., scales = 'free', space = 'free') + 
  labs(x= "Launch Counts", y = "", 
       fill= "Countries", 
       title= "Yearly Launch Counts wrt Private owned Agencies in different countries") +
  theme(strip.text.y = element_blank())



launches %>%
  filter(agency_type%in%c("private","startup")) %>%
  inner_join(agencies %>% select(agency, name, short_name, parent), by = 'agency') %>% 
  count(launch_year, state_code, sort=T) %>%
  mutate(state_code=fct_reorder(state_code, launch_year)) %>%
  plot_ly(x=~n, y= ~launch_year, color=~state_code, colors="Dark2", type='bar') %>%
  layout(barmode='stack', 
         legend = T, 
         xaxis = list(title = "Launch Counts"), 
         yaxis = list(title = "", showticklabels = F), 
         title = "Launch Counts of Companies not handled by Government")
n too large, allowed maximum for palette Dark2 is 8
Returning the palette you asked for with that many colors
n too large, allowed maximum for palette Dark2 is 8
Returning the palette you asked for with that many colors
n too large, allowed maximum for palette Dark2 is 8
Returning the palette you asked for with that many colors
n too large, allowed maximum for palette Dark2 is 8
Returning the palette you asked for with that many colors
n too large, allowed maximum for palette Dark2 is 8
Returning the palette you asked for with that many colors
n too large, allowed maximum for palette Dark2 is 8
Returning the palette you asked for with that many colors
n too large, allowed maximum for palette Dark2 is 8
Returning the palette you asked for with that many colors
n too large, allowed maximum for palette Dark2 is 8
Returning the palette you asked for with that many colors
n too large, allowed maximum for palette Dark2 is 8
Returning the palette you asked for with that many colors
n too large, allowed maximum for palette Dark2 is 8
Returning the palette you asked for with that many colors
n too large, allowed maximum for palette Dark2 is 8
Returning the palette you asked for with that many colors
n too large, allowed maximum for palette Dark2 is 8
Returning the palette you asked for with that many colors
n too large, allowed maximum for palette Dark2 is 8
Returning the palette you asked for with that many colors
n too large, allowed maximum for palette Dark2 is 8
Returning the palette you asked for with that many colors
n too large, allowed maximum for palette Dark2 is 8
Returning the palette you asked for with that many colors
n too large, allowed maximum for palette Dark2 is 8
Returning the palette you asked for with that many colors
n too large, allowed maximum for palette Dark2 is 8
Returning the palette you asked for with that many colors
n too large, allowed maximum for palette Dark2 is 8
Returning the palette you asked for with that many colors
n too large, allowed maximum for palette Dark2 is 8
Returning the palette you asked for with that many colors
n too large, allowed maximum for palette Dark2 is 8
Returning the palette you asked for with that many colors
launches %>%
  count(agency_type, state_code, sort=T) %>% 
  mutate(state_code = fct_reorder(state_code, n)) %>%
  plot_ly(x=~n, y=~state_code, color=~agency_type) %>%
  layout(barmode='stack', 
         xaxis=list(type='log', title = "", showticklabels = F), 
         yaxis=list(title = ""), 
         title= 'Space Programs among Countries')
No trace type specified:
  Based on info supplied, a 'bar' trace seems appropriate.
  Read more about this trace type -> https://plot.ly/r/reference/#bar
No trace type specified:
  Based on info supplied, a 'bar' trace seems appropriate.
  Read more about this trace type -> https://plot.ly/r/reference/#bar

russian_vehicles = launches %>% 
  filter(state_code=='Russia') %>%
  group_by(type, state_code) %>% 
  summarise(earliest=min(launch_year), latest=max(launch_year), counts=n()) %>%
  ungroup()%>%
  arrange(-counts) %>% filter(counts>=mean(counts))
`summarise()` regrouping output by 'type' (override with `.groups` argument)
launches %>% 
  semi_join(russian_vehicles, by='type')%>%
  mutate(type = fct_reorder(type, launch_date, min)) %>%
  ggplot(aes(x=launch_date, y=type, color=type)) + 
  geom_jitter(alpha=0.2, height = 0.2, show.legend = F) + 
  theme(legend.position = 'none')+ theme_minimal()+
  labs(title = 'Russian Space Vehicle Timeline',
       subtitle = "Only greater than 30 launches",
       x= "Launch Date", y="Vehicle")


launches %>% 
  filter(state_code=='United States')%>%
  add_count(type) %>% filter(n>=mean(n)) %>%
  mutate(type = fct_reorder(type, launch_date, min)) %>%
  ggplot(aes(x=launch_date, y=type, color=agency_type)) + 
  geom_jitter(alpha=0.2, height = 0.2) + theme_minimal()+
  labs(title = 'US Space Vehicle Timeline',
       x= "Launch Date", y="Vehicle", color='Agency Type')

---
title: "R Notebook"
output: html_notebook
---
```{r}
library(tidyverse)
library(janitor)
library(lubridate)
library(plotly)
```


```{r}
astronauts = read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-14/astronauts.csv')
astronauts = astronauts %>% 
  mutate(
    sex = as.factor(sex),
    year_of_birth = year(parse_date(as.character(astronauts$year_of_birth), "%Y")),
    nationality = as.factor(nationality),
    selection = as.factor(selection),
    year_of_selection = year(parse_date(as.character(astronauts$year_of_selection), "%Y")),
    mission_number = as.factor(mission_number),
    occupation = as.factor(occupation),
    year_of_mission = year(parse_date(as.character(astronauts$year_of_mission), "%Y")),
    mission_title = as.factor(mission_title),
    ascend_shuttle = as.factor(ascend_shuttle),
    in_orbit = as.factor(in_orbit),
    descend_shuttle = as.factor(descend_shuttle)
  )
astronauts
```


```{r}
launches = read_csv("https://raw.githubusercontent.com/TheEconomist/graphic-detail-data/master/data/2018-10-20_space-launches/launches.csv")
launches = within.data.frame(launches, rm(tag, JD))
launches = launches %>%
  mutate(
    type = as.factor(type), 
    variant = as.factor(variant),
    state_code = as.factor(state_code), 
    category = as.factor(category),
    agency_type = as.factor(agency_type)
    )
launches = launches %>% filter(launch_date<=Sys.Date())

require(countrycode)

launches = launches %>%
  mutate(
    state_code = fct_collapse(
      state_code,
      "RU" = c("SU", "RU"),
      "FR" = "F",
      "JP" = "J",
      "IT" = "I",
      "FR" = c("I-ESA", "I-ELDO"),
      "KY" = "CYM",
      "GB" = "UK")
    ) %>%
  mutate(state_code = countrycode(state_code, "iso2c", "country.name"),
         state_code = as.factor(state_code))
```


```{r}
agencies = read_csv("https://raw.githubusercontent.com/TheEconomist/graphic-detail-data/master/data/2018-10-20_space-launches/agencies.csv")
agencies = agencies %>%
  mutate(
    tstart = parse_date(as.character(tstart), "%Y %b %d"),
    tstop = parse_date(as.character(tstop), "%Y %b %d"),
    agency_type = as.factor(agency_type)
  )
agencies
```


```{r}
launches %>%
  count(launch_year, agency_type)


launches %>%
  count(launch_year, agency_type) %>%
  ggplot(aes(launch_year, n, color= agency_type)) +geom_line() +
  labs(x = "Year", y = "Launch Counts", color="Agency Type")

launches %>%
  count(launch_year, agency_type) %>%
  plot_ly(x = ~launch_year, y = ~n, color=~agency_type, type = 'scatter', mode = 'lines')
```

```{r}
launches %>% 
  mutate(type = fct_reorder(type, launch_date, min)) %>%
  ggplot(aes(x=launch_date, y=state_code, color=agency_type)) + 
  geom_jitter(alpha=0.2, height = 0.2) + 
  theme_minimal() +
  facet_grid(agency_type~., scales = 'free') + 
  labs(x= "# of Agencies", 
       y="", 
       color='Agency Type',
       title = '# of Agencies in different countries Countries')

launches %>% 
  count(agency_type, state_code, sort=T) %>%
  plot_ly(y = ~state_code, x = ~n, color = ~agency_type, type="bar") %>%
  layout(legend = list(title=list(text='Agency Type')), 
         xaxis = list(title = "# of Agencies", type = "log"), 
         yaxis = list(title = ""), 
         title = '# of Agencies in different countries Countries')
```

```{r}
launches %>%
  mutate(state_code = fct_lump(state_code, 6)) %>%
  count(launch_year, state_code, sort=T) %>%
  mutate(state_code = fct_reorder(state_code, -n, sum))%>%
  ggplot(aes(launch_year, n, color=state_code)) + geom_line() + 
  labs(x= "Launch Year", 
       y = "Launch Counts", 
       color= "Countries", 
       title= "Yearly Launch Counts wrt Countries")

launches %>%
  mutate(state_code = fct_lump(state_code, 6)) %>%
  count(launch_year, state_code, sort=T) %>%
  mutate(state_code = fct_reorder(state_code, -n, sum)) %>%
  plot_ly(x = ~launch_year, y = ~n, color=~state_code) %>% 
  add_lines() %>% 
  layout(legend = list(title=list(text='Countries')), 
         xaxis = list(title = "Launch Year"), 
         yaxis = list(title = "Launch Counts"), 
         title = 'Yearly Launch Counts wrt Countries')
```


```{r}
launches %>%
  filter(agency_type%in%c("private","startup")) %>%
  inner_join(agencies %>% select(agency, name, short_name, parent), by = 'agency')  %>%
  ggplot(aes(y=name, fill = state_code)) + geom_bar() + 
  facet_grid(state_code~., scales = 'free', space = 'free') + 
  labs(x= "Launch Counts", y = "", 
       fill= "Countries", 
       title= "Yearly Launch Counts wrt Private owned Agencies in different countries") +
  theme(strip.text.y = element_blank())


launches %>%
  filter(agency_type%in%c("private","startup")) %>%
  inner_join(agencies %>% select(agency, name, short_name, parent), by = 'agency') %>% 
  count(launch_year, state_code, sort=T) %>%
  mutate(state_code=fct_reorder(state_code, launch_year)) %>%
  plot_ly(x=~n, y= ~launch_year, color=~state_code, colors="Dark2", type='bar') %>%
  layout(barmode='stack', 
         legend = T, 
         xaxis = list(title = "Launch Counts"), 
         yaxis = list(title = "", showticklabels = F), 
         title = "Launch Counts of Companies not handled by Government")
```

```{r}
launches %>%
  count(agency_type, state_code, sort=T) %>% 
  mutate(state_code = fct_reorder(state_code, n)) %>%
  plot_ly(x=~n, y=~state_code, color=~agency_type) %>%
  layout(barmode='stack', 
         xaxis=list(type='log', title = "", showticklabels = F), 
         yaxis=list(title = ""), 
         title= 'Space Programs among Countries')

russian_vehicles = launches %>% 
  filter(state_code=='Russia') %>%
  group_by(type, state_code) %>% 
  summarise(earliest=min(launch_year), latest=max(launch_year), counts=n()) %>%
  ungroup()%>%
  arrange(-counts) %>% filter(counts>=mean(counts))

launches %>% 
  semi_join(russian_vehicles, by='type')%>%
  mutate(type = fct_reorder(type, launch_date, min)) %>%
  ggplot(aes(x=launch_date, y=type, color=type)) + 
  geom_jitter(alpha=0.2, height = 0.2, show.legend = F) + 
  theme(legend.position = 'none')+ theme_minimal()+
  labs(title = 'Russian Space Vehicle Timeline',
       subtitle = "Only greater than 30 launches",
       x= "Launch Date", y="Vehicle")

launches %>% 
  filter(state_code=='United States')%>%
  add_count(type) %>% filter(n>=mean(n)) %>%
  mutate(type = fct_reorder(type, launch_date, min)) %>%
  ggplot(aes(x=launch_date, y=type, color=agency_type)) + 
  geom_jitter(alpha=0.2, height = 0.2) + theme_minimal()+
  labs(title = 'US Space Vehicle Timeline',
       x= "Launch Date", y="Vehicle", color='Agency Type')

```

